Prepare pbulk gene expression data for DE analysis
library(knitr)
library(SummarizedExperiment)
library(limma)
library(edgeR)
library(dplyr)
library(ggplot2)
library(BiocParallel)
library(doParallel)
param = SnowParam(32,"SOCK",progressbar = T)
register(param)
registerDoParallel()
options(future.globals.maxSize = 120 * 1024 ^3)
knitr::opts_chunk$set(echo = TRUE)
options(stringsAsFactors = FALSE)
opts_knit$set(echo = TRUE, message = FALSE, warning = FALSE, cache.lazy = FALSE, out.width=900)
opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, cache.lazy = FALSE, out.width=900)
output.folder <- file.path("output","CITE-seq")
dir.create(output.folder)
## Warning in dir.create(output.folder): 'output/CITE-seq' already exists
pseudobulk_list <- readRDS(file.path("data","covid_flu.CITEseq.pseudobulk.gene.expr.RDS"))
Filtering and normalization of count data, and create a batch-corrected gene expression dataset for visualization purpose
var.threshold <- 1
pseudobulk_list_normalized <- list()
pseudobulk_list_filtered <- list()
for (i in names(pseudobulk_list)) {
cat(i,"-\n")
eset <- pseudobulk_list[[i]]
geneExpr <- eset$counts
sample.info <- eset$samples
sample.info$sample.id <- paste0(sample.info$alt.subject.id,".",sample.info$visit)
# filter out lowly expressed genes
cat("Number of genes with no counts across samples:\n")
print(table(rowSums(eset$counts==0)==nrow(sample.info)))
keep.exprs <- filterByExpr(eset,group = sample.info$visit.overall.group.sex, min.count = 2,min.prop=0.5)
cat("Number of genes to keep by expr:",sum(keep.exprs),"\n")
eset <- eset[keep.exprs,,keep.lib.sizes = F]
# stage 1: normalize and correct
eset <- calcNormFactors(eset)
sample.info$visit.overall.group.sex <- ifelse(is.na(sample.info$visit.overall.group.sex),"Control",as.character(sample.info$visit.overall.group.sex))
# a model that accommodate the control samples
tmp.model <- model.matrix(~ 0 + visit.overall.group.sex,sample.info)
vobj <- voom(eset ,design = tmp.model,plot = F)
#title("",i)
geneExpr <- vobj$E # normalized
# PCA using baseline samples before correction
baseline.samples <- subset(sample.info,!(visit %in% c("Day 1","Day 28")))
baseline.geneExpr <- geneExpr[,rownames(baseline.samples)]
baseline.geneExpr.pca <- prcomp(t(baseline.geneExpr),center = T,scale. = T)
baseline.samples <- cbind(baseline.samples,baseline.geneExpr.pca$x[rownames(baseline.samples),1:2])
print(ggplot(baseline.samples,aes(PC1,PC2)) + geom_point(aes(color=paste0(Batch),shape=alt.subject.id == "Control"),size=2,alpha=0.8) +
ggtitle(paste0("Before correction: ",i)))
cat("Before correction:\n")
print(car::Anova(lm(PC1 ~ n_barcodes + Batch,baseline.samples)))
print(car::Anova(lm(PC2 ~ n_barcodes + Batch,baseline.samples)))
# remove batch effect
normalized.expr.batch.effect <- removeBatchEffect(geneExpr, batch = eset$samples$Batch,
covariates = eset$samples$n_barcodes,
design = tmp.model)
geneExpr <- normalized.expr.batch.effect
eset$normalizedExpr <- geneExpr
pseudobulk_list_normalized[[i]] <- eset
# PCA using baseline samples after correction
baseline.samples <- subset(sample.info,!(visit %in% c("Day 1","Day 28")))
baseline.geneExpr <- geneExpr[,rownames(baseline.samples)]
baseline.geneExpr.pca <- prcomp(t(baseline.geneExpr),center = T,scale. = T)
baseline.samples <- cbind(baseline.samples,baseline.geneExpr.pca$x[rownames(baseline.samples),1:2])
print(ggplot(baseline.samples,aes(PC1,PC2)) + geom_point(aes(color=paste0(Batch),shape=alt.subject.id == "Control"),size=2,alpha=0.8) +
ggtitle(paste0("After correction: ",i)))
cat("After correction:\n")
print(car::Anova(lm(PC1 ~ n_barcodes + Batch,baseline.samples)))
print(car::Anova(lm(PC2 ~ n_barcodes + Batch,baseline.samples)))
# stage 2: filter out genes by technical noise by leveraging repeats
if (sum(table(sample.info$sample.id) > 1) > 0) {
geneExpr.var <- apply(geneExpr,1,var)
repeat.geneExpr.var <- data.frame(id=sample.info$sample.id,t(geneExpr)) %>% group_by(id) %>% summarise(across(where(is.numeric),var))
repeat.geneExpr.var <- apply(as.data.frame(repeat.geneExpr.var[,-1]),2,mean,na.rm=T)
plot(log2(repeat.geneExpr.var+1),log2(geneExpr.var+1),main=i)
abline(coef=c(0,1),col="red")
keep.var <- geneExpr.var > repeat.geneExpr.var*var.threshold
cat("Number of genes to keep by var:",sum(keep.var),"\n")
eset <- eset[keep.var,,keep.lib.sizes = F]
geneExpr <- geneExpr[keep.var,]
eset$normalizedExpr <- geneExpr
}
pseudobulk_list_filtered[[i]] <- eset
}
## B -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 22081 6321
## Number of genes to keep by expr: 13286
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 71 1 0.7015 0.4071
## Batch 32259 2 159.6085 <2e-16 ***
## Residuals 4143 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 2507.8 1 3.8744 0.05581 .
## Batch 1219.3 2 0.9418 0.39818
## Residuals 26538.1 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 200 1 0.2503 0.6195
## Batch 233 2 0.1459 0.8647
## Residuals 32742 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 0.2 1 0.0003 0.9854
## Batch 3.6 2 0.0029 0.9971
## Residuals 25504.4 41
## Number of genes to keep by var: 9344
## B_Mem -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 20155 8247
## Number of genes to keep by expr: 11707
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 2635.5 1 14.635 0.000437 ***
## Batch 9141.5 2 25.381 6.722e-08 ***
## Residuals 7383.5 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 340.1 1 0.7094 0.4045
## Batch 1206.5 2 1.2583 0.2949
## Residuals 19657.2 41
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 196.3 1 0.3565 0.5537
## Batch 426.1 2 0.3870 0.6815
## Residuals 22570.7 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 65.5 1 0.1353 0.7149
## Batch 156.5 2 0.1615 0.8514
## Residuals 19860.3 41
## Number of genes to keep by var: 6045
## B_Naive -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 20734 7668
## Number of genes to keep by expr: 12097
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 31.2 1 0.4065 0.5273
## Batch 28130.6 2 183.2146 <2e-16 ***
## Residuals 3147.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 9377.9 1 23.1484 2.054e-05 ***
## Batch 790.0 2 0.9751 0.3857
## Residuals 16610.0 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1071.2 1 1.7628 0.1916
## Batch 577.7 2 0.4754 0.6250
## Residuals 24914.8 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 177.0 1 0.3139 0.5784
## Batch 38.3 2 0.0340 0.9666
## Residuals 23114.5 41
## Number of genes to keep by var: 9436
## B_Naive_Intermediate -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 18457 9945
## Number of genes to keep by expr: 10295
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 5613.9 1 22.0304 2.987e-05 ***
## Batch 3349.6 2 6.5724 0.003343 **
## Residuals 10447.9 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 2988.2 1 9.1398 0.004300 **
## Batch 3837.3 2 5.8684 0.005738 **
## Residuals 13404.7 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1147.9 1 2.0510 0.1597
## Batch 934.3 2 0.8346 0.4413
## Residuals 22947.0 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1174.8 1 2.9229 0.09489 .
## Batch 895.2 2 1.1136 0.33812
## Residuals 16479.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Number of genes to keep by var: 6274
## CD4 -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 24592 3810
## Number of genes to keep by expr: 16414
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 328 1 5.931 0.01931 *
## Batch 79225 2 716.196 < 2e-16 ***
## Residuals 2268 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 20211.1 1 33.0264 9.909e-07 ***
## Batch 3144.4 2 2.5691 0.08888 .
## Residuals 25090.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 19299 1 19.2062 7.961e-05 ***
## Batch 833 2 0.4147 0.6632
## Residuals 41198 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 2502 1 2.6768 0.1095
## Batch 484 2 0.2592 0.7730
## Residuals 38316 41
## Number of genes to keep by var: 12577
## CD4_CM -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 22678 5724
## Number of genes to keep by expr: 13924
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 41 1 0.9274 0.3412
## Batch 46150 2 517.4058 <2e-16 ***
## Residuals 1828 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 19167.4 1 68.3917 2.831e-10 ***
## Batch 4094.5 2 7.3049 0.001934 **
## Residuals 11490.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 17553.6 1 39.3929 1.75e-07 ***
## Batch 1544.3 2 1.7328 0.1895
## Residuals 18269.8 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 709.9 1 1.2340 0.2731
## Batch 468.4 2 0.4071 0.6682
## Residuals 23585.5 41
## Number of genes to keep by var: 10495
## CD4_EM -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 21037 7365
## Number of genes to keep by expr: 12514
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 324.4 1 4.27 0.04515 *
## Batch 19929.8 2 131.16 < 2e-16 ***
## Residuals 3115.1 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 5819.3 1 11.4757 0.001567 **
## Batch 5150.5 2 5.0784 0.010704 *
## Residuals 20790.9 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1170.7 1 1.5841 0.2153
## Batch 303.0 2 0.2050 0.8155
## Residuals 30299.5 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 568.5 1 0.9127 0.3450
## Batch 339.3 2 0.2724 0.7629
## Residuals 25539.9 41
## Number of genes to keep by var: 8040
## CD4_Naive -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 23214 5188
## Number of genes to keep by expr: 14862
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 2362 1 41.939 9.101e-08 ***
## Batch 58024 2 515.197 < 2.2e-16 ***
## Residuals 2309 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 20886 1 53.9884 5.346e-09 ***
## Batch 4451 2 5.7526 0.006279 **
## Residuals 15862 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 7155.1 1 10.1625 0.002743 **
## Batch 91.4 2 0.0649 0.937252
## Residuals 28866.7 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 32.4 1 0.0445 0.8341
## Batch 1234.4 2 0.8478 0.4357
## Residuals 29849.4 41
## Number of genes to keep by var: 10442
## CD4_platelet_bind -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 18819 9583
## Number of genes to keep by expr: 10420
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 9597.6 1 31.8751 1.379e-06 ***
## Batch 3491.2 2 5.7974 0.006064 **
## Residuals 12345.1 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 210.3 1 3.6266 0.06389 .
## Batch 13348.3 2 115.0904 < 2e-16 ***
## Residuals 2377.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 28.0 1 0.0613 0.8056
## Batch 509.9 2 0.5575 0.5769
## Residuals 18750.4 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1553.1 1 4.5524 0.0389 *
## Batch 162.1 2 0.2376 0.7896
## Residuals 13987.1 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Number of genes to keep by var: 6889
## CD4_Tfh -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 16099 12303
## Number of genes to keep by expr: 7391
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 7328.5 1 41.6731 1.719e-07 ***
## Batch 8.0 2 0.0228 0.9775
## Residuals 6330.8 36
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1567.2 1 9.9111 0.003295 **
## Batch 4402.5 2 13.9213 3.322e-05 ***
## Residuals 5692.4 36
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 7 1 0.0207 0.8865
## Batch 104 2 0.1527 0.8589
## Residuals 12260 36
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 0.1 1 0.0003 0.9861
## Batch 120.2 2 0.2291 0.7964
## Residuals 9441.3 36
## Number of genes to keep by var: 4456
## CD4_Treg -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 19005 9397
## Number of genes to keep by expr: 10798
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 11079.9 1 37.126 3.189e-07 ***
## Batch 365.3 2 0.612 0.5471
## Residuals 12236.0 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1635.6 1 8.778 0.005056 **
## Batch 11819.0 2 31.715 4.744e-09 ***
## Residuals 7639.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1687.6 1 3.5470 0.06676 .
## Batch 326.0 2 0.3425 0.71198
## Residuals 19507.2 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 215.8 1 0.4588 0.502
## Batch 195.8 2 0.2081 0.813
## Residuals 19288.0 41
## Number of genes to keep by var: 6613
## CD8 -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 23595 4807
## Number of genes to keep by expr: 14934
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 7290 1 4.8730 0.032934 *
## Batch 21213 2 7.0904 0.002267 **
## Residuals 61333 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 3779 1 13.608 0.0006553 ***
## Batch 41476 2 74.677 2.143e-14 ***
## Residuals 11386 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 873 1 0.4143 0.5234
## Batch 988 2 0.2343 0.7922
## Residuals 86404 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 6 1 0.0060 0.9384
## Batch 71 2 0.0389 0.9619
## Residuals 37548 41
## Number of genes to keep by var: 10944
## CD8_CM -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 20014 8388
## Number of genes to keep by expr: 11511
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 9707.2 1 32.0403 1.314e-06 ***
## Batch 4423.9 2 7.3009 0.00194 **
## Residuals 12421.8 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1060.3 1 3.1909 0.08145 .
## Batch 14440.4 2 21.7286 3.681e-07 ***
## Residuals 13623.9 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 29.7 1 0.0439 0.8351
## Batch 359.0 2 0.2649 0.7686
## Residuals 27775.4 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 70.8 1 0.1296 0.7207
## Batch 38.8 2 0.0355 0.9651
## Residuals 22412.0 41
## Number of genes to keep by var: 7996
## CD8_EM -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 21356 7046
## Number of genes to keep by expr: 12441
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 16138.5 1 28.4207 3.844e-06 ***
## Batch 4372.7 2 3.8503 0.02935 *
## Residuals 23281.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 3377.6 1 11.258 0.001717 **
## Batch 29767.9 2 49.613 1.127e-11 ***
## Residuals 12300.1 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 3350 1 3.7591 0.05943 .
## Batch 1226 2 0.6878 0.50837
## Residuals 36534 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 215.8 1 0.2937 0.5908
## Batch 239.6 2 0.1630 0.8501
## Residuals 30123.0 41
## Number of genes to keep by var: 7351
## CD8_EM:GPR56hi -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 19583 8819
## Number of genes to keep by expr: 10402
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 7244 1 9.1612 0.004259 **
## Batch 407 2 0.2577 0.774095
## Residuals 32417 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 16.8 1 0.1015 0.7517
## Batch 15694.0 2 47.3692 2.196e-11 ***
## Residuals 6791.9 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1162 1 1.2699 0.2663
## Batch 1071 2 0.5852 0.5616
## Residuals 37502 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 19.6 1 0.0398 0.8428
## Batch 472.0 2 0.4807 0.6218
## Residuals 20127.9 41
## Number of genes to keep by var: 5997
## CD8_EM:GPR56lo -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 20473 7929
## Number of genes to keep by expr: 11660
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 16001.0 1 37.2709 3.067e-07 ***
## Batch 6556.6 2 7.6361 0.001517 **
## Residuals 17602.0 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 523.5 1 3.8171 0.05758 .
## Batch 25826.4 2 94.1636 4.707e-16 ***
## Residuals 5622.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 2976.4 1 4.3688 0.04285 *
## Batch 508.0 2 0.3728 0.69110
## Residuals 27932.1 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 403.3 1 0.6833 0.4132
## Batch 442.6 2 0.3749 0.6897
## Residuals 24200.5 41
## Number of genes to keep by var: 6902
## CD8_Naive -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 21424 6978
## Number of genes to keep by expr: 12992
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 10547.8 1 16.6157 0.000205 ***
## Batch 4994.2 2 3.9336 0.027366 *
## Residuals 26027.2 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 29.0 1 0.1931 0.6627
## Batch 30686.0 2 102.1766 <2e-16 ***
## Residuals 6156.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 206 1 0.2185 0.6427
## Batch 1276 2 0.6772 0.5136
## Residuals 38635 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 585.4 1 0.9417 0.3375
## Batch 494.1 2 0.3974 0.6746
## Residuals 25486.2 41
## Number of genes to keep by var: 9229
## CD8_proliferating -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 15952 12450
## Number of genes to keep by expr: 9499
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 4174.3 1 5.4806 0.03094 *
## Batch 350.7 2 0.2302 0.79668
## Residuals 13709.9 18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 434.7 1 0.7710 0.3915
## Batch 1858.4 2 1.6481 0.2202
## Residuals 10148.7 18
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 666.0 1 0.7121 0.4098
## Batch 802.6 2 0.4290 0.6576
## Residuals 16835.7 18
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 549.3 1 0.8367 0.3724
## Batch 371.6 2 0.2830 0.7568
## Residuals 11817.3 18
## Number of genes to keep by var: 6636
## CD8_TEMRA -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 20200 8202
## Number of genes to keep by expr: 11303
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 11052.3 1 23.1575 2.048e-05 ***
## Batch 2593.4 2 2.7169 0.07798 .
## Residuals 19567.9 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 111.8 1 0.590 0.4468
## Batch 18545.1 2 48.928 1.379e-11 ***
## Residuals 7770.1 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 241.7 1 0.3674 0.5477
## Batch 308.2 2 0.2342 0.7922
## Residuals 26974.5 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 3.3 1 0.0057 0.9404
## Batch 388.7 2 0.3349 0.7173
## Residuals 23788.4 41
## Number of genes to keep by var: 6790
## CD8_TRM -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 17231 11171
## Number of genes to keep by expr: 9112
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 12005.0 1 39.8568 1.715e-07 ***
## Batch 1143.8 2 1.8988 0.163
## Residuals 12048.1 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 34.4 1 0.4341 0.5137
## Batch 10628.0 2 66.9987 1.7e-13 ***
## Residuals 3172.6 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 512.2 1 1.1736 0.2851
## Batch 164.9 2 0.1889 0.8286
## Residuals 17457.5 40
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1.8 1 0.0058 0.9394
## Batch 13.3 2 0.0214 0.9789
## Residuals 12481.3 40
## Number of genes to keep by var: 5926
## cDC -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 20665 7737
## Number of genes to keep by expr: 12241
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 0.3 1 0.0101 0.9205
## Batch 26364.6 2 444.1075 <2e-16 ***
## Residuals 1187.3 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 7158.3 1 16.1734 0.0002491 ***
## Batch 510.3 2 0.5764 0.5664986
## Residuals 17703.8 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 2913.9 1 5.0692 0.02992 *
## Batch 781.6 2 0.6799 0.51244
## Residuals 22993.3 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 48.5 1 0.0964 0.7578
## Batch 144.8 2 0.1439 0.8664
## Residuals 20120.5 40
## Number of genes to keep by var: 7013
## gdT-Vd2 -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 19815 8587
## Number of genes to keep by expr: 10989
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 17330.9 1 28.7944 3.665e-06 ***
## Batch 2678.8 2 2.2254 0.1212
## Residuals 24075.3 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 5.5 1 0.0099 0.9214
## Batch 237.7 2 0.2129 0.8092
## Residuals 22328.1 40
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 255 1 0.3092 0.5813
## Batch 669 2 0.4053 0.6695
## Residuals 33023 40
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 103.6 1 0.1687 0.6835
## Batch 167.1 2 0.1360 0.8732
## Residuals 24569.2 40
## Number of genes to keep by var: 5455
## HSPC -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 17215 11187
## Number of genes to keep by expr: 10598
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 4159.0 1 12.7047 0.001943 **
## Batch 6074.5 2 9.2779 0.001411 **
## Residuals 6547.3 20
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 238.4 1 0.3711 0.5493
## Batch 540.1 2 0.4204 0.6625
## Residuals 12846.8 20
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 677.1 1 0.9820 0.3335
## Batch 132.3 2 0.0959 0.9090
## Residuals 13790.9 20
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 0.0 1 0.0000 0.9965
## Batch 1743.8 2 1.3694 0.2771
## Residuals 12733.5 20
## ILC -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 13806 14596
## Number of genes to keep by expr: 7465
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 5565.9 1 16.7964 0.001477 **
## Batch 182.1 2 0.2747 0.764418
## Residuals 3976.5 12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 136.2 1 0.2038 0.6597
## Batch 278.9 2 0.2087 0.8145
## Residuals 8019.3 12
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1254.3 1 3.4266 0.08891 .
## Batch 3830.2 2 5.2317 0.02324 *
## Residuals 4392.7 12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 213.9 1 0.3159 0.5844
## Batch 1046.5 2 0.7729 0.4833
## Residuals 8124.1 12
## Number of genes to keep by var: 5619
## Mac-or-Mono -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 18956 9446
## Number of genes to keep by expr: 9403
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 408.5 1 3.2719 0.07839 .
## Batch 6645.0 2 26.6106 5.944e-08 ***
## Residuals 4744.5 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 2154.7 1 4.3638 0.04346 *
## Batch 4026.8 2 4.0777 0.02487 *
## Residuals 18763.1 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 358.8 1 0.5915 0.4466
## Batch 505.0 2 0.4162 0.6625
## Residuals 23051.4 38
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 8.4 1 0.0182 0.8934
## Batch 16.1 2 0.0175 0.9826
## Residuals 17484.4 38
## Number of genes to keep by var: 6086
## MAIT -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 20101 8301
## Number of genes to keep by expr: 11448
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 16578 1 16.2854 0.0002321 ***
## Batch 7967 2 3.9133 0.0278365 *
## Residuals 41736 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 295.2 1 0.3934 0.5340
## Batch 595.4 2 0.3967 0.6751
## Residuals 30763.7 41
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 4234 1 3.4360 0.071 .
## Batch 3200 2 1.2983 0.284
## Residuals 50520 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 729 1 0.8943 0.3498
## Batch 34 2 0.0209 0.9794
## Residuals 33403 41
## Number of genes to keep by var: 6721
## Mono -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 24334 4068
## Number of genes to keep by expr: 15992
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1681 1 10.941 0.001964 **
## Batch 52854 2 171.994 < 2.2e-16 ***
## Residuals 6300 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 9064 1 7.2687 0.01013 *
## Batch 9582 2 3.8421 0.02955 *
## Residuals 51127 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1163 1 0.6990 0.4080
## Batch 6 2 0.0019 0.9981
## Residuals 68220 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 2694 1 2.3374 0.1340
## Batch 213 2 0.0923 0.9121
## Residuals 47248 41
## Number of genes to keep by var: 12468
## Mono_Classical -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 23665 4737
## Number of genes to keep by expr: 15069
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 9040 1 9.3335 0.0039451 **
## Batch 17593 2 9.0814 0.0005433 ***
## Residuals 39713 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 4789 1 7.4918 0.009123 **
## Batch 39902 2 31.2112 5.787e-09 ***
## Residuals 26208 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 3562 1 1.9313 0.1721
## Batch 42 2 0.0114 0.9886
## Residuals 75622 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 887 1 0.7049 0.406
## Batch 41 2 0.0162 0.984
## Residuals 51581 41
## Number of genes to keep by var: 11499
## Mono_Intermediate -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 21149 7253
## Number of genes to keep by expr: 12147
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 9737.4 1 12.8837 0.000895 ***
## Batch 1854.4 2 1.2268 0.304037
## Residuals 30231.8 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1229.4 1 15.585 0.000311 ***
## Batch 26708.1 2 169.291 < 2.2e-16 ***
## Residuals 3155.3 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 6745 1 7.4542 0.009366 **
## Batch 44 2 0.0246 0.975760
## Residuals 36194 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 652.5 1 1.0892 0.3029
## Batch 20.6 2 0.0172 0.9829
## Residuals 23964.2 40
## Number of genes to keep by var: 8147
## Mono_NonClassical -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 21087 7315
## Number of genes to keep by expr: 12611
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1689.4 1 16.567 0.0002088 ***
## Batch 21141.7 2 103.658 < 2.2e-16 ***
## Residuals 4181.1 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 4962.5 1 11.627 0.0014712 **
## Batch 7050.2 2 8.259 0.0009685 ***
## Residuals 17499.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 319.2 1 0.4677 0.4979
## Batch 552.5 2 0.4048 0.6698
## Residuals 27982.0 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 2496.1 1 4.4909 0.04018 *
## Batch 670.5 2 0.6032 0.55186
## Residuals 22788.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Number of genes to keep by var: 8204
## Mono-T-dblt -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 19141 9261
## Number of genes to keep by expr: 10254
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 12986.3 1 33.7193 8.144e-07 ***
## Batch 56.5 2 0.0734 0.9294
## Residuals 15790.3 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 368.4 1 1.3052 0.25990
## Batch 2333.9 2 4.1344 0.02314 *
## Residuals 11572.4 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 6.3 1 0.0108 0.9177
## Batch 157.9 2 0.1344 0.8746
## Residuals 24076.8 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1129.7 1 3.1486 0.08342 .
## Batch 6.2 2 0.0087 0.99135
## Residuals 14710.4 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Number of genes to keep by var: 6041
## Neut -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 15883 12519
## Number of genes to keep by expr: 6335
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 3262.9 1 16.8497 0.0002606 ***
## Batch 1398.9 2 3.6118 0.0385146 *
## Residuals 6196.8 32
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 2512.5 1 10.1421 0.003223 **
## Batch 1944.3 2 3.9241 0.029912 *
## Residuals 7927.4 32
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 822.8 1 2.5130 0.1227
## Batch 225.4 2 0.3442 0.7114
## Residuals 10477.9 32
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 697.7 1 2.3484 0.1352
## Batch 67.6 2 0.1138 0.8928
## Residuals 9507.0 32
## Number of genes to keep by var: 3884
## NK -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 22909 5493
## Number of genes to keep by expr: 14234
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 29 1 0.2066 0.6518
## Batch 58497 2 204.9108 <2e-16 ***
## Residuals 5852 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 9422.1 1 14.9320 0.0003893 ***
## Batch 251.4 2 0.1992 0.8201974
## Residuals 25870.9 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 3206 1 3.6900 0.06171 .
## Batch 13 2 0.0077 0.99236
## Residuals 35618 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 676.4 1 0.8830 0.3529
## Batch 304.2 2 0.1986 0.8207
## Residuals 31405.0 41
## Number of genes to keep by var: 10902
## NK_CD16hi -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 22656 5746
## Number of genes to keep by expr: 13809
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 56 1 0.403 0.5291
## Batch 56044 2 199.950 <2e-16 ***
## Residuals 5746 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 5761.1 1 8.4537 0.005855 **
## Batch 68.2 2 0.0501 0.951223
## Residuals 27940.9 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 2495 1 2.8743 0.09759 .
## Batch 4 2 0.0026 0.99744
## Residuals 35592 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 31.3 1 0.0417 0.8391
## Batch 477.5 2 0.3185 0.7290
## Residuals 30733.6 41
## Number of genes to keep by var: 10527
## NK_CD56hiCD16lo -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 18299 10103
## Number of genes to keep by expr: 9998
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 3351.2 1 9.7708 0.003254 **
## Batch 2316.7 2 3.3772 0.043883 *
## Residuals 14062.3 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 164.1 1 2.711 0.1073
## Batch 14784.7 2 122.145 <2e-16 ***
## Residuals 2481.4 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 9.6 1 0.0213 0.8846
## Batch 259.2 2 0.2891 0.7504
## Residuals 18378.4 41
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 0.3 1 0.0007 0.9793
## Batch 18.0 2 0.0224 0.9778
## Residuals 16427.7 41
## Number of genes to keep by var: 6488
## NK_proliferating -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 16249 12153
## Number of genes to keep by expr: 8594
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 5088.7 1 17.0273 0.0002695 ***
## Batch 1506.2 2 2.5199 0.0973628 .
## Residuals 8965.7 30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 808.4 1 5.6104 0.02449 *
## Batch 7071.8 2 24.5407 4.85e-07 ***
## Residuals 4322.5 30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 369.3 1 0.7951 0.3797
## Batch 301.0 2 0.3240 0.7257
## Residuals 13934.9 30
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 118.5 1 0.2846 0.5977
## Batch 18.6 2 0.0223 0.9780
## Residuals 12494.9 30
## Number of genes to keep by var: 4975
## pDC -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 18978 9424
## Number of genes to keep by expr: 10776
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 4917.0 1 21.654 3.548e-05 ***
## Batch 4560.2 2 10.041 0.0002926 ***
## Residuals 9082.9 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 4124.5 1 29.584 2.896e-06 ***
## Batch 12768.1 2 45.792 4.541e-11 ***
## Residuals 5576.6 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 529.7 1 1.1665 0.2866
## Batch 2.3 2 0.0025 0.9975
## Residuals 18163.7 40
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 667.2 1 1.5508 0.2203
## Batch 435.4 2 0.5060 0.6067
## Residuals 17210.2 40
## Number of genes to keep by var: 6819
## Plasmablast -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 15840 12562
## Number of genes to keep by expr: 9451
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 1291.3 1 2.6874 0.129404
## Batch 7852.6 2 8.1714 0.006684 **
## Residuals 5285.4 11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 1432.0 1 2.3918 0.1502
## Batch 2478.7 2 2.0700 0.1726
## Residuals 6585.9 11
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 23.3 1 0.0228 0.8828
## Batch 1706.9 2 0.8321 0.4608
## Residuals 11282.7 11
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 11.8 1 0.0166 0.8998
## Batch 3490.4 2 2.4626 0.1307
## Residuals 7795.5 11
## Platelet -
## Number of genes with no counts across samples:
##
## FALSE TRUE
## 15772 12630
## Number of genes to keep by expr: 5702
## Before correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 15148.3 1 49.876 1.346e-08 ***
## Batch 8068.6 2 13.283 3.569e-05 ***
## Residuals 12452.4 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 3893.0 1 47.666 2.251e-08 ***
## Batch 6061.8 2 37.110 6.320e-10 ***
## Residuals 3348.6 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## After correction:
## Anova Table (Type II tests)
##
## Response: PC1
## Sum Sq Df F value Pr(>F)
## n_barcodes 2417.5 1 4.7877 0.03442 *
## Batch 144.5 2 0.1431 0.86709
## Residuals 20702.2 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
##
## Response: PC2
## Sum Sq Df F value Pr(>F)
## n_barcodes 947.5 1 4.8891 0.03266 *
## Batch 482.1 2 1.2437 0.29897
## Residuals 7945.8 41
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Number of genes to keep by var: 3352
saveRDS(pseudobulk_list_normalized,file.path(output.folder,paste0("covid_flu.CITEseq.pseudobulk.gene.expr.normalized.RDS")))
saveRDS(pseudobulk_list_filtered,file.path(output.folder,paste0("covid_flu.CITEseq.pseudobulk.gene.expr.filtered.RDS")))
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server 7.9 (Maipo)
##
## Matrix products: default
## BLAS/LAPACK: /sysapps/cluster/software/Anaconda3/2020.07/envs/R-4.1.0/lib/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] doParallel_1.0.16 iterators_1.0.13
## [3] foreach_1.5.1 BiocParallel_1.26.2
## [5] ggplot2_3.3.5 dplyr_1.0.8
## [7] edgeR_3.34.1 limma_3.48.3
## [9] SummarizedExperiment_1.22.0 Biobase_2.52.0
## [11] GenomicRanges_1.44.0 GenomeInfoDb_1.28.1
## [13] IRanges_2.26.0 S4Vectors_0.30.0
## [15] BiocGenerics_0.38.0 MatrixGenerics_1.6.0
## [17] matrixStats_0.61.0 knitr_1.33
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.0 jsonlite_1.7.2 carData_3.0-4
## [4] bslib_0.2.5.1 assertthat_0.2.1 highr_0.9
## [7] cellranger_1.1.0 GenomeInfoDbData_1.2.6 yaml_2.2.1
## [10] pillar_1.6.1 lattice_0.20-44 glue_1.6.2
## [13] digest_0.6.27 XVector_0.32.0 colorspace_2.0-2
## [16] htmltools_0.5.1.1 Matrix_1.3-4 pkgconfig_2.0.3
## [19] haven_2.4.1 zlibbioc_1.38.0 purrr_0.3.4
## [22] scales_1.1.1 openxlsx_4.2.4 rio_0.5.27
## [25] tibble_3.1.2 generics_0.1.0 farver_2.1.0
## [28] car_3.0-11 ellipsis_0.3.2 withr_2.4.2
## [31] cli_3.0.0 readxl_1.3.1 magrittr_2.0.1
## [34] crayon_1.4.1 evaluate_0.14 fansi_0.4.2
## [37] forcats_0.5.1 foreign_0.8-81 tools_4.1.0
## [40] data.table_1.14.0 hms_1.1.0 lifecycle_1.0.1
## [43] stringr_1.4.0 munsell_0.5.0 locfit_1.5-9.4
## [46] zip_2.2.0 DelayedArray_0.18.0 compiler_4.1.0
## [49] jquerylib_0.1.4 rlang_1.0.2 grid_4.1.0
## [52] RCurl_1.98-1.3 bitops_1.0-7 labeling_0.4.2
## [55] rmarkdown_2.9 gtable_0.3.0 codetools_0.2-18
## [58] abind_1.4-5 DBI_1.1.1 curl_4.3.2
## [61] R6_2.5.0 utf8_1.2.1 stringi_1.7.6
## [64] Rcpp_1.0.7 vctrs_0.3.8 tidyselect_1.1.2
## [67] xfun_0.24